library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
options(
tigris_class = "sf",
tigris_use_cache = TRUE
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
Setting up the data
# load demographic data
sj_dem_by_block <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Cameron_Tenner/sj_socialdistancing_demdata_02.rds") %>%
mutate(`ami_% over 75,000` = (100 - `ami_% under 75,000`), `ami_% over 100,000` = (100 - `ami_% under 100,000`), `% not speaking spanish` = (100 - `% speaking spanish`), `log(% speaking english well)` = log((100-exp(`log(% speaking english < well)`)))) %>%
dplyr::select(-`% leaving home`) # select all but social distancing data because I am going to process it differently than has been done in this
# load the social distancing data
# get San Jose block groups
scc_blockgroups <- block_groups("CA","Santa Clara", cb=F, progress_bar=F)
# Use tracts sent to us by San Jose
sj_tracts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp") %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
## Reading layer `CSJ_Census_Tracts' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp' using driver `ESRI Shapefile'
## Simple feature collection with 219 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID): 2227
## proj4string: +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
sj_citycouncil_disticts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp") %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
## Reading layer `CITY_COUNCIL_DISTRICTS' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp' using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 7 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID): 2227
## proj4string: +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
# from code written by others to get SJ blockgroups
sj_blockgroups <-
scc_blockgroups %>%
st_centroid() %>%
st_join(sj_tracts, left = F) %>%
st_join(sj_citycouncil_disticts%>% dplyr::select(DISTRICTS)) %>%
mutate(
DISTRICTS = DISTRICTS %>% factor(levels = c("1","2","3","4","5","6","7","8","9","10"))
) %>%
st_set_geometry(NULL) %>%
left_join(scc_blockgroups%>% dplyr::select(GEOID), by = "GEOID") %>%
st_as_sf() %>%
dplyr::select(GEOID, DISTRICTS)
# the spatial join leaves off two blockgroups which are touching district 9. The following code assigns those to district 9
sj_blockgroups$DISTRICTS[is.na(sj_blockgroups$DISTRICTS)] <- 9
# code from others in the class to get social distancing data
sj_socialdistancing <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/sj_socialdistancing.rds") %>%
mutate(date = date_range_start %>% substr(1,10) %>% as.Date()) %>%
left_join(sj_blockgroups, by = c("origin_census_block_group" = "GEOID")) %>%
filter(!is.na(DISTRICTS))
# obtaining weekends
weekends <-
sj_socialdistancing %>%
filter(!duplicated(date)) %>%
arrange(date) %>%
mutate(
weekend =
ifelse(
(date %>% as.numeric()) %% 7 %in% c(2,3),
T,
F
)
) %>%
dplyr::select(date,weekend)
sj_socialdistancing <-
sj_socialdistancing %>%
left_join(weekends)
# date of the shelter in place order
shelter_start <- "2020-03-16" %>% as.Date()
# get average staying at home on weekdays in January and February, and add trendlines and add column indicating before or after shelter in place
sj_pre_sd_at_home_average <- sj_socialdistancing %>%
filter(weekend == F) %>%
filter(date < as.Date("2020-03-01")) %>%
group_by(origin_census_block_group) %>%
summarize(completely_home_device_count = sum(completely_home_device_count), device_count = sum(device_count)) %>%
mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1), `% leaving home` = (100 - `% Completely at Home`)) %>%
left_join(sj_dem_by_block, by = c("origin_census_block_group" = "blockgroup")) %>%
filter(!is.na(district))
sj_pre_sd_at_home_average[is.na(sj_pre_sd_at_home_average)] <- 0
sj_pre_sd_at_home_average <- sj_pre_sd_at_home_average %>%
mutate(
ami_trendline =
fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`ami_% over 100,000`)),
income_trendline = fitted(lm((sj_pre_sd_at_home_average)$`% leaving home` ~ (sj_pre_sd_at_home_average)$median_income)),
young_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`percent less than 30`)),
elderly_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`percent elderly`)),
spanish_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`% not speaking spanish`)),
english_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`log(% speaking english well)`)),
api_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`% speaking api`))) %>%
cbind(`Before or After Shelter-in-Place` = "Before shelter-in-place")
# same for since shelter in place
sj_post_sd_at_home_average <- sj_socialdistancing %>%
filter(weekend == F) %>%
filter(date > shelter_start) %>%
group_by(origin_census_block_group) %>%
summarize(completely_home_device_count = sum(completely_home_device_count), device_count = sum(device_count)) %>%
mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1), `% leaving home` = (100 - `% Completely at Home`)) %>%
left_join(sj_dem_by_block, by = c("origin_census_block_group" = "blockgroup")) %>%
filter(!is.na(district))
sj_post_sd_at_home_average[is.na(sj_post_sd_at_home_average)] <- 0
sj_post_sd_at_home_average <- sj_post_sd_at_home_average %>%
mutate(
ami_trendline =
fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`ami_% over 100,000`)),
income_trendline = fitted(lm((sj_post_sd_at_home_average)$`% leaving home` ~ (sj_post_sd_at_home_average)$median_income)),
young_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`percent less than 30`)),
elderly_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`percent elderly`)),
spanish_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`% not speaking spanish`)),
english_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`log(% speaking english well)`)),
api_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`% speaking api`))) %>%
cbind(`Before or After Shelter-in-Place` = "After shelter-in-place")
sj_dem_distancing_combined <- rbind(sj_pre_sd_at_home_average, sj_post_sd_at_home_average)
# convert the before/after column to factor
sj_dem_distancing_combined$`Before or After Shelter-in-Place` <- factor(sj_dem_distancing_combined$`Before or After Shelter-in-Place`, levels = c("Before shelter-in-place", "After shelter-in-place"))
saveRDS(sj_dem_distancing_combined, "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/sj_socialdistancing_demdata_prepost.rds")
Now make plots.
fig_ami <-
plot_ly (sj_dem_distancing_combined) %>%
add_trace(
x = ~`ami_% over 100,000`,
y = ~`% leaving home`,
frame = ~`Before or After Shelter-in-Place`,
type = 'scatter',
mode = 'markers',
showlegend = F
) %>%
add_trace(
x = ~`ami_% over 100,000`,
y = ~ami_trendline,
type = 'scatter',
mode = 'lines',
line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
frame = ~`Before or After Shelter-in-Place`,
showlegend = F
) %>%
animation_button(visible = F) %>%
layout(xaxis = list(title = '% of households making over $100,000'), yaxis = list(title = '% leaving home'))
fig_ami
fig_income <-
plot_ly (sj_dem_distancing_combined %>% filter(median_income > 0)) %>%
add_trace(
x = ~median_income,
y = ~`% leaving home`,
frame = ~`Before or After Shelter-in-Place`,
type = 'scatter',
mode = 'markers',
showlegend = F
) %>%
add_trace(
x = ~median_income,
y = ~income_trendline,
type = 'scatter',
mode = 'lines',
line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
frame = ~`Before or After Shelter-in-Place`,
showlegend = F
) %>%
animation_button(visible = F) %>%
layout(xaxis = list(title = 'median income'), yaxis = list(title = '% leaving home'))
fig_income
fig_english <-
plot_ly (sj_dem_distancing_combined) %>%
add_trace(
x = ~`log(% speaking english well)`,
y = ~`% leaving home`,
frame = ~`Before or After Shelter-in-Place`,
type = 'scatter',
mode = 'markers',
showlegend = F
) %>%
add_trace(
x = ~`log(% speaking english well)`,
y = ~english_trendline,
type = 'scatter',
mode = 'lines',
line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
frame = ~`Before or After Shelter-in-Place`,
showlegend = F
) %>%
animation_button(visible = F) %>%
layout(xaxis = list(title = '% speaking English well'), yaxis = list(title = '% leaving home'))
fig_english
fig_spanish <-
plot_ly (sj_dem_distancing_combined) %>%
add_trace(
x = ~`% not speaking spanish`,
y = ~`% leaving home`,
frame = ~`Before or After Shelter-in-Place`,
type = 'scatter',
mode = 'markers',
showlegend = F
) %>%
add_trace(
x = ~`% not speaking spanish`,
y = ~spanish_trendline,
type = 'scatter',
mode = 'lines',
line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
frame = ~`Before or After Shelter-in-Place`,
showlegend = F
) %>%
animation_button(visible = F) %>%
layout(xaxis = list(title = '% NOT speaking Spanish'), yaxis = list(title = '% leaving home'))
fig_spanish
fig_api <-
plot_ly (sj_dem_distancing_combined) %>%
add_trace(
x = ~`% speaking api`,
y = ~`% leaving home`,
frame = ~`Before or After Shelter-in-Place`,
type = 'scatter',
mode = 'markers',
showlegend = F
) %>%
add_trace(
x = ~`% speaking api`,
y = ~api_trendline,
type = 'scatter',
mode = 'lines',
line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
frame = ~`Before or After Shelter-in-Place`,
showlegend = F
) %>%
animation_button(visible = F) %>%
layout(xaxis = list(title = '% speaking API'), yaxis = list(title = '% leaving home'))
fig_api
fig_elderly <-
plot_ly (sj_dem_distancing_combined) %>%
add_trace(
x = ~`percent elderly`,
y = ~`% leaving home`,
frame = ~`Before or After Shelter-in-Place`,
type = 'scatter',
mode = 'markers',
showlegend = F
) %>%
add_trace(
x = ~`percent elderly`,
y = ~elderly_trendline,
type = 'scatter',
mode = 'lines',
line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
frame = ~`Before or After Shelter-in-Place`,
showlegend = F
) %>%
animation_button(visible = F) %>%
layout(xaxis = list(title = '% elderly'), yaxis = list(title = '% leaving home'))
fig_elderly
fig_young <-
plot_ly (sj_dem_distancing_combined) %>%
add_trace(
x = ~`percent less than 30`,
y = ~`% leaving home`,
frame = ~`Before or After Shelter-in-Place`,
type = 'scatter',
mode = 'markers',
showlegend = F
) %>%
add_trace(
x = ~`percent less than 30`,
y = ~young_trendline,
type = 'scatter',
mode = 'lines',
line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
frame = ~`Before or After Shelter-in-Place`,
showlegend = F
) %>%
animation_button(visible = F) %>%
layout(xaxis = list(title = '% younger than 30'), yaxis = list(title = '% leaving home'))
fig_young